Iterated Estimation of fMRI Data

Deconvolution within the Lyman framework, based on Mumford et al. (2012) LS-S method. For each subject and run, iteratively fit models to estimate the $\beta$/$t$-statistic for each condition of interest, while including all the other trials in a single regression. Extract the appropriate values for all voxels within a given mask. This approach uses a combination of OLS and AR1 procedures during GLM fitting. This step assumes that the timeseries has been preprocessed, and all runs have been linearly transformed into the first run's space.

Import necessary packages


In [1]:
import pandas as pd
import json
from scipy import stats, signal, linalg
from sklearn.decomposition import PCA
import nibabel as nib
import nipype
from nipype import Node, SelectFiles, DataSink, IdentityInterface
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use("Agg")
from nipype.interfaces import fsl
from nipy.modalities.fmri.glm import FMRILinearModel
from nibabel import Nifti1Image, save
import numpy as np

import os
import os.path as op
import shutil
import sys
import copy

import lyman
import moss
from lyman import tools
from lyman import default_experiment_parameters
import lyman.workflows as wf
from moss import glm
import seaborn as sns

%matplotlib inline
sns.set(context="notebook", style="ticks", font="Arial")
pd.set_option('display.precision', 3)


/Users/steph-backup/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:1256: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

Define helper functions


In [2]:
def plotSimilarityStruct(run_data, run_evs):    
    from scipy.cluster.hierarchy import linkage, dendrogram
    from scipy.spatial.distance import pdist, squareform
    data_dist = pdist(run_data.T, 'correlation')
    data_link = linkage(data_dist)

    # Compute and plot first dendrogram.
    fig = plt.figure(figsize=(8,8))
    # x ywidth height
    ax1 = fig.add_axes([0.05,0.1,0.2,0.6])
    Y = linkage(data_dist, method='single')
    Z1 = dendrogram(Y, orientation='right',labels=run_evs, distance_sort=True) # adding/removing the axes
    ax1.set_xticks([])

    # Compute and plot second dendrogram.
    ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
    Z2 = dendrogram(Y)
    ax2.set_xticks([])
    ax2.set_yticks([])

    #Compute and plot the heatmap
    axmatrix = fig.add_axes([0.37,0.1,0.6,0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    D = squareform(data_dist)
    D = D[idx1,:]
    D = D[:,idx2]
    im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar.
    axcolor = fig.add_axes([1,0.1,0.02,0.6])
    plt.colorbar(im, cax=axcolor)
    
    
def removeSegment(run_evs, sep, remove_seg):
    run_evs = ["-".join(x.split('-')[0:remove_seg]) for x in list(run_evs)]
    return run_evs

def transform_fisherZ(r):
    z = 0.5*np.log((1+r)/(1-r))
    return z

Project specific parameters


In [3]:
experiment = 'objfam'
altmodel = 'trial-prototype'
nruns = 12
subject_list = '/Volumes/groups/awagner/sgagnon/ObjFam/data/subids_subset_no23or19.txt'
unsmoothed = True
condition_labels = True # If condition_file to specify which condition the trials belong to
condition_filename = 'trial-prototype-condition.csv' # only necessary if condition_labels = True

In [4]:
project = lyman.gather_project_info()
exp = lyman.gather_experiment_info(experiment, altmodel)
group = np.loadtxt(subject_list, str).tolist()
exp_base = experiment
exp_name = "-".join([exp_base, altmodel])

data_dir = project["data_dir"]
analysis_dir = project["analysis_dir"]

smoothing = "unsmoothed" if unsmoothed else "smoothed"


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-4-943fd6fd2ac5> in <module>()
      1 project = lyman.gather_project_info()
----> 2 exp = lyman.gather_experiment_info(experiment, altmodel)
      3 group = np.loadtxt(subject_list, str).tolist()
      4 exp_base = experiment
      5 exp_name = "-".join([exp_base, altmodel])

/Users/steph-backup/anaconda/lib/python2.7/site-packages/lyman/frontend.pyc in gather_experiment_info(exp_name, altmodel)
     48     except KeyError:
     49         exp_file = op.join(lyman_dir, exp_name + ".py")
---> 50         exp = imp.load_source(exp_name, exp_file)
     51 
     52     exp_dict = default_experiment_parameters()

IOError: [Errno 2] No such file or directory

Iterate through subjects and runs


In [ ]:
mask_name = 'lateraloccipital'
out_val = 't' # t or beta

In [ ]:
sub_mat = []
group_evs = []
for subid in group:
    print subid

    design_file = op.join(data_dir, subid, "design", exp["design_name"] + ".csv")
    sub_dmat = pd.read_csv(design_file)
    
    if condition_labels:
        condition_file = op.join(data_dir, subid, "design", condition_filename)
        cond_map = pd.read_csv(condition_file)
    
    # get 3D mask as bool
    mask = nib.load(mask_file).get_data() == 1

    run_mat = []
    ev_list = []
    for run in range(1, nruns+1):
        print 'Run: ' + str(run)
        
        # Setup run specific directories
        # preproc timeseries registered to first run
        timeseries_dir = op.join(analysis_dir, experiment, subid, "reg/epi/unsmoothed/run_" + str(run))
        preproc_dir = op.join(analysis_dir, experiment, subid, "preproc/run_" + str(run))
        
        realign_file = op.join(preproc_dir, "realignment_params.csv")
        artifact_file = op.join(preproc_dir, "artifacts.csv")
        timeseries_file = op.join(timeseries_dir, "timeseries_xfm.nii.gz")
        # mask_file = op.join(timeseries_dir, "functional_mask_xfm.nii.gz")
        mask_file = op.join(data_dir, subid, 'masks', mask_name + '.nii.gz')
        
        # Build the model design
        run_dmat = sub_dmat[sub_dmat.run == run]
        
        realign = pd.read_csv(realign_file)
        realign = realign.filter(regex="rot|trans").apply(stats.zscore)
        
        artifacts = pd.read_csv(artifact_file).max(axis=1)
        ntp = len(artifacts)
        tr = exp["TR"]
        
        hrf = getattr(glm, exp["hrf_model"])
        hrf = hrf(exp["temporal_deriv"], tr, **exp["hrf_params"])
        
        ev_mat = []
        for ev in run_dmat.condition.unique():
            ev_list.append(ev)
            
            design_LSS = copy.deepcopy(run_dmat)
            design_LSS.condition[design_LSS.condition != ev] = 'other'
            
            design_kwargs = dict(confounds=realign,
                                 artifacts=artifacts,
                                 tr=tr,
                                 condition_names=sorted(design_LSS.condition.unique()),  # sort to keep condition of interest first
                                 confound_pca=exp["confound_pca"],
                                 hpf_cutoff=exp["hpf_cutoff"])
            X = glm.DesignMatrix(design_LSS, hrf, ntp, **design_kwargs)
            
#             print ev
#             print X.design_matrix.columns
            
            # Fit model
            fmri_glm = FMRILinearModel(timeseries_file, np.array(X.design_matrix), mask=mask_file)
            fmri_glm.fit(do_scaling=True, model='ar1')

            # Get beta
            beta_hat = fmri_glm.glms[0].get_beta()
            
            # Output appropriate statistic
            if out_val == 'beta':
                ev_mat.append(beta_hat[0])
            elif out_val == 't':
                # Calc t-statistic
                num_reg = beta_hat.shape[0]
                con = [[1] + [0]*(num_reg-1)]
                t_map, = fmri_glm.contrast(con, con_id=ev, contrast_type='t')
                t_map = t_map.get_data()[mask].ravel()
                
                ev_mat.append(t_map)

        run_mat.append(ev_mat)
        
    sub_mat.append(run_mat)
    group_evs.append(ev_list)

In [ ]:
data = np.array(sub_mat)
evs = np.array(group_evs)

print 'Data shape (subid x run x trial x voxel):' + str(data.shape)
print 'EVs shape (subid x trial):' + str(evs.shape)

In [ ]:
data[5,0]
group[5]

In [ ]:
sub_num = 8
run_data = data[sub_num,0]
run_data = np.vstack(run_data).T # voxel x ev
run_evs = evs[sub_num].reshape(12,30)[0]

sns.corrplot(run_data[np.argsort(run_evs)], 
             names = run_evs[np.argsort(run_evs)], 
             diag_names=False, 
             annot=False, cmap_range=(-1,1))

In [ ]:
df = pd.DataFrame(run_data, columns=run_evs)
corr_mat = df.corr()
sns.clustermap(corr_mat)

Compute 2-way correlations


In [ ]:
df_corr = pd.DataFrame(columns=['subid', 'run', 'condition', 'corr'])
df_condmap = pd.DataFrame()

for sub_num in range(data.shape[0]):
    
    subid = group[sub_num]
    sub_data = data[sub_num]
    sub_evs = evs[sub_num].reshape(12,30)
    
    condition_file = op.join(data_dir, subid, "design", condition_filename)
    cond_map = pd.read_csv(condition_file)
    cond_map['subid'] = subid
    df_condmap = pd.concat([df_condmap, cond_map])
    
    for run in range(sub_data.shape[0]):
        
        run_data = sub_data[run]
        run_data = np.vstack(run_data).T # voxel x ev
        run_evs = sub_evs[run]

        run_conds = removeSegment(run_evs, '-', 2)
        df = pd.DataFrame(run_data, columns=run_conds)

        for cond in set(run_conds):
            corr_value = np.array(df[cond].corr())[0][1]
            df_corr = df_corr.append([dict(subid=subid,
                                           run=run+1,
                                           condition=cond, 
                                           corr=corr_value)], 
                                     ignore_index=True)
            
            
df_corr.head()

In [ ]:
df_corr = df_corr.merge(df_condmap, on=['condition', 'run', 'subid'])
df_corr.head()

In [ ]:
sns.distplot(data.ix[1500:3000]['corr'])

In [ ]:
data = df_corr.join(pd.DataFrame(df_corr.morphmem.str.split('_').tolist(), 
                                 columns=['morph', 'resp'])).ix[1500:3000]

sns.factorplot(x='morph', y='corr', #hue='resp',dodge=0.1,
               ci=68, units='subid', data=data)

In [ ]:
data['morph_q'] = data.morph.astype('int')

In [ ]:
data_group = data.groupby(['subid', 'morph']).mean().reset_index()

In [ ]:
data_group.head()

In [ ]:
data_group['z'] = transform_fisherZ(data_group['corr'])
data_group.head()

In [ ]:
sns.violinplot(x='morph', y='z', data=data_group, 
               inner="points")

In [ ]:
sns.coefplot("z~scale(morph_q)", 
             data=data_group,
             ci=68)

In [ ]:


In [ ]:


In [115]:


In [ ]: